Data load

DREAM model

#L1 = getContrast( vobjDream, form, merged_meta_mono, c("treatmentLPS", "treatmentBasal"))
#L2 = getContrast( vobjDream, form, merged_meta_mono, c("treatmentIFNg", "treatmentBasal"))

#L = cbind(L1, L2)
#fit = dream(vobjDream, form, merged_meta_mono, L)
#res_LPS_mono <- data.frame(topTable(fit, coef='L1', number=nrow(genes_counts_filt1), sort.by = "p"), check.names = F)
#res_IFNy_mono <- data.frame(topTable(fit, coef='L2', number=nrow(genes_counts_filt1), sort.by = "p"), check.names = F)
gencode_30 = read.table("~/Documents/MiGASti/Databases/ens.geneid.gencode.v30")
colnames(gencode_30) = c("ensembl","symbol")
#res_LPS_mono <- tibble::rownames_to_column(res_LPS_mono, "ensembl")
#res_LPS_monocytes <- merge(res_LPS_mono, gencode_30, by = "ensembl")
#res_IFNy_mono <- tibble::rownames_to_column(res_IFNy_mono, "ensembl")
#res_IFNy_monocytes <- merge(res_IFNy_mono, gencode_30, by = "ensembl")
#save(res_LPS_monocytes, file = "res_LPS_monocytes.Rdata")
#save(res_IFNy_monocytes, file = "res_IFNy_monocytes.Rdata")

LPS

DE genes 15% LPS

load("~/Documents/MiGASti/Databases/res_LPS_monocytes.Rdata")
sign_LPS <- subset(res_LPS_monocytes, adj.P.Val < 0.15)
length(rownames(sign_LPS))
## [1] 11205

DE genes 10% LPS

sign_LPS <- subset(res_LPS_monocytes, adj.P.Val < 0.10)
length(rownames(sign_LPS))
## [1] 10841

DE genes 5% LPS

sign_LPS <- subset(res_LPS_monocytes, adj.P.Val < 0.05)
length(rownames(sign_LPS))
## [1] 10219

FDR distibution

res = res_LPS_monocytes
p = ggplot(res, aes(P.Value))
p + geom_density(color="darkblue", fill="lightblue") +
theme_classic() +
ggtitle("FDR Distribution")

Fold change distribution

p = ggplot(res, aes(logFC))
p + geom_density(color = "darkblue", fill = "lightblue") +
theme_classic() +
ggtitle("Fold Change Distribution")

MA plot

plot.data = res
plot.data$id = rownames(plot.data)
data = data.frame(plot.data)
data$P.Value = -log10(data$P.Value)
data$fifteen = as.factor(abs(data$adj.P.Val < 0.05))
ma = ggplot(data, aes(AveExpr, logFC, color = fifteen))
ma + geom_point() +
scale_color_manual(values = c("black", "red"), labels = c ("> 0.05", "< 0.05")) +
labs(title = "MA plot", color = "labels") +
theme_classic()

#theme(plot.title = element_text(hjust = 0.5)) + ylim (-10,10) + xlim(-4,22)

Volcano plot

vp = ggplot(data, aes(logFC, P.Value, color = fifteen))
vp + geom_point() +
scale_color_manual(values = c("black", "red"), labels = c("> 0.05", "< 0.05")) +
labs(title = "Gene Level Volcano Plot", color = "FDR") +
#theme(plot.title = element_text(hjust = 0.5)) +
theme_classic() +
xlim(-5,5) + ylim(0, 30) + ylab("-log10 pvalue")
## Warning: Removed 269 rows containing missing values (geom_point).

Data table for download

res_LPS_diff_top = res_LPS_monocytes[, c("ensembl", "symbol", "logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "z.std")]
createDT(res_LPS_diff_top)
## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html

IFNy

DE genes 15% IFNy

load("~/Documents/MiGASti/Databases/res_IFNy_monocytes.Rdata")
sign_IFNy <- subset(res_IFNy_monocytes, adj.P.Val < 0.15)
length(rownames(sign_IFNy))
## [1] 9020

DE genes 10% IFNy

sign_IFNy <- subset(res_IFNy_monocytes, adj.P.Val < 0.10)
length(rownames(sign_IFNy))
## [1] 8441

DE genes 5% IFNy

sign_IFNy <- subset(res_IFNy_monocytes, adj.P.Val < 0.05)
length(rownames(sign_IFNy))
## [1] 7604

FDR distibution

res = res_IFNy_monocytes
p = ggplot(res, aes(P.Value))
p + geom_density(color="darkblue", fill="lightblue") +
theme_classic() +
ggtitle("FDR Distribution")

Fold change distribution

p = ggplot(res, aes(logFC))
p + geom_density(color = "darkblue", fill = "lightblue") +
theme_classic() +
ggtitle("Fold Change Distribution")

MA plot

plot.data = res
plot.data$id = rownames(plot.data)
data = data.frame(plot.data)
data$P.Value = -log10(data$P.Value)
data$fifteen = as.factor(abs(data$adj.P.Val < 0.05))
ma = ggplot(data, aes(AveExpr, logFC, color = fifteen))
ma + geom_point() +
scale_color_manual(values = c("black", "red"), labels = c ("> 0.05", "< 0.05")) +
labs(title = "MA plot", color = "labels") +
theme_classic()

#theme(plot.title = element_text(hjust = 0.5)) + ylim (-10,10) + xlim(-4,22)

Volcano plot

vp = ggplot(data, aes(logFC, P.Value, color = fifteen))
vp + geom_point() +
scale_color_manual(values = c("black", "red"), labels = c("> 0.05", "< 0.05")) +
labs(title = "Gene Level Volcano Plot", color = "FDR") +
#theme(plot.title = element_text(hjust = 0.5)) +
theme_classic() +
xlim(-5,5) + ylim(0, 30) + ylab("-log10 pvalue")
## Warning: Removed 133 rows containing missing values (geom_point).

Data table for download

res_IFNy_diff_top = res_IFNy_monocytes[, c("ensembl", "symbol", "logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "z.std")]
createDT(res_IFNy_diff_top)
## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html

Overlap with significant microglia LPS genes FDR 5%

load("~/Documents/MiGASti/docs/2nd_pass/res_LPS_name.Rdata")
sign_microglia_LPS <- subset(res_LPS_name, adj.P.Val < 0.05)
sign_monocytes_LPS <- subset(res_LPS_monocytes, adj.P.Val < 0.05)
overlap <- merge(sign_microglia_LPS, sign_monocytes_LPS, by = "symbol")
set1 <- sign_microglia_LPS
set2 <- sign_monocytes_LPS

set1_v <- set1$symbol
set2_v <- set2$symbol

setEnrichment <- function(set1, set2, universe = 20000){

  a = sum(set1 %in% set2)

  c = length(set1) - a

  b = length(set2) - a

  d = universe - length(set2) - c

  contingency_table = matrix(c(a, c, b, d), nrow = 2)

  # one-tailed test for enrichment only

  fisher_results = fisher.test(contingency_table, alternative = "greater")

  # returns data.frame containing the lengths of the two sets, the overlap, the enrichment ratio (odds ratio) and P value

  df <- tibble::tibble( set1_length = length(set1), set2_length = length(set2), overlap = a, ratio = fisher_results$estimate, p.value = fisher_results$p.value)

  return(df)
}

setEnrichment( set1 = set1_v, set2 = set2_v)

Overlap with significant microglia IFNy genes FDR 5%

load("~/Documents/MiGASti/docs/2nd_pass/res_IFNy_name.Rdata")
sign_microglia_IFNy <- subset(res_IFNy_name, adj.P.Val < 0.05)
sign_monocytes_IFNy <- subset(res_IFNy_monocytes, adj.P.Val < 0.05)
overlap <- merge(sign_microglia_IFNy, sign_monocytes_IFNy, by = "symbol")
set1 <- sign_microglia_IFNy
set2 <- sign_monocytes_IFNy

set1_v <- set1$symbol
set2_v <- set2$symbol

setEnrichment <- function(set1, set2, universe = 20000){

  a = sum(set1 %in% set2)

  c = length(set1) - a

  b = length(set2) - a

  d = universe - length(set2) - c

  contingency_table = matrix(c(a, c, b, d), nrow = 2)

  # one-tailed test for enrichment only

  fisher_results = fisher.test(contingency_table, alternative = "greater")

  # returns data.frame containing the lengths of the two sets, the overlap, the enrichment ratio (odds ratio) and P value

  df <- tibble::tibble( set1_length = length(set1), set2_length = length(set2), overlap = a, ratio = fisher_results$estimate, p.value = fisher_results$p.value)

  return(df)
}

setEnrichment( set1 = set1_v, set2 = set2_v)

Overlap with significant microglia LPS genes FDR 5% log FC > 1 microglia and monocytes > 5

sign_microglia_LPS <- subset(res_LPS_name, adj.P.Val < 0.05)
sign_microglia_LPS_LF <- subset(sign_microglia_LPS, logFC > 1)
sign_monocytes_LPS <- subset(res_LPS_monocytes, adj.P.Val < 0.05)
sign_monocytes_LPS_LF <- subset(sign_monocytes_LPS, logFC > 5)
overlap <- merge(sign_monocytes_LPS_LF, sign_monocytes_LPS_LF, by = "symbol")
set1 <- sign_microglia_LPS_LF
set2 <- sign_monocytes_LPS_LF

set1_v <- set1$symbol
set2_v <- set2$symbol

setEnrichment <- function(set1, set2, universe = 20000){

  a = sum(set1 %in% set2)

  c = length(set1) - a

  b = length(set2) - a

  d = universe - length(set2) - c

  contingency_table = matrix(c(a, c, b, d), nrow = 2)

  # one-tailed test for enrichment only

  fisher_results = fisher.test(contingency_table, alternative = "greater")

  # returns data.frame containing the lengths of the two sets, the overlap, the enrichment ratio (odds ratio) and P value

  df <- tibble::tibble( set1_length = length(set1), set2_length = length(set2), overlap = a, ratio = fisher_results$estimate, p.value = fisher_results$p.value)

  return(df)
}

setEnrichment( set1 = set1_v, set2 = set2_v)

Overlap with significant microglia LPS genes FDR 5% log FC < -1 microglia and monocytes -5

sign_microglia_LPS <- subset(res_LPS_name, adj.P.Val < 0.05)
sign_microglia_LPS_LF <- subset(sign_microglia_LPS, logFC < -1)
sign_monocytes_LPS <- subset(res_LPS_monocytes, adj.P.Val < 0.05)
sign_monocytes_LPS_LF <- subset(sign_monocytes_LPS, logFC < -5)
overlap <- merge(sign_monocytes_LPS_LF, sign_monocytes_LPS_LF, by = "symbol")
set1 <- sign_microglia_LPS_LF
set2 <- sign_monocytes_LPS_LF

set1_v <- set1$symbol
set2_v <- set2$symbol

setEnrichment <- function(set1, set2, universe = 20000){

  a = sum(set1 %in% set2)

  c = length(set1) - a

  b = length(set2) - a

  d = universe - length(set2) - c

  contingency_table = matrix(c(a, c, b, d), nrow = 2)

  # one-tailed test for enrichment only

  fisher_results = fisher.test(contingency_table, alternative = "greater")

  # returns data.frame containing the lengths of the two sets, the overlap, the enrichment ratio (odds ratio) and P value

  df <- tibble::tibble( set1_length = length(set1), set2_length = length(set2), overlap = a, ratio = fisher_results$estimate, p.value = fisher_results$p.value)

  return(df)
}

setEnrichment( set1 = set1_v, set2 = set2_v)

Overlap with significant microglia IFNy genes FDR 5% log FC > 1 and monocytes 5

sign_microglia_IFNy <- subset(res_IFNy_name, adj.P.Val < 0.05)
sign_microglia_IFNy_LF <- subset(sign_microglia_IFNy, logFC > 1)
sign_monocytes_IFNy <- subset(res_IFNy_monocytes, adj.P.Val < 0.05)
sign_monocytes_IFNy_LF <- subset(sign_monocytes_IFNy, logFC > 5)
overlap <- merge(sign_monocytes_IFNy_LF, sign_monocytes_IFNy_LF, by = "symbol")
set1 <- sign_microglia_IFNy_LF
set2 <- sign_monocytes_IFNy_LF

set1_v <- set1$symbol
set2_v <- set2$symbol

setEnrichment <- function(set1, set2, universe = 20000){

  a = sum(set1 %in% set2)

  c = length(set1) - a

  b = length(set2) - a

  d = universe - length(set2) - c

  contingency_table = matrix(c(a, c, b, d), nrow = 2)

  # one-tailed test for enrichment only

  fisher_results = fisher.test(contingency_table, alternative = "greater")

  # returns data.frame containing the lengths of the two sets, the overlap, the enrichment ratio (odds ratio) and P value

  df <- tibble::tibble( set1_length = length(set1), set2_length = length(set2), overlap = a, ratio = fisher_results$estimate, p.value = fisher_results$p.value)

  return(df)
}

setEnrichment( set1 = set1_v, set2 = set2_v)

Overlap with significant microglia IFNy genes FDR 5% log FC < -1 microglia and monocytes -5

sign_microglia_IFNy <- subset(res_IFNy_name, adj.P.Val < 0.05)
sign_microglia_IFNy_LF <- subset(sign_microglia_IFNy, logFC < -1)
sign_monocytes_IFNy <- subset(res_IFNy_monocytes, adj.P.Val < 0.05)
sign_monocytes_IFNy_LF <- subset(sign_monocytes_IFNy, logFC < -5)
overlap <- merge(sign_monocytes_IFNy_LF, sign_monocytes_IFNy_LF, by = "symbol")
set1 <- sign_microglia_IFNy_LF
set2 <- sign_monocytes_IFNy_LF

set1_v <- set1$symbol
set2_v <- set2$symbol

setEnrichment <- function(set1, set2, universe = 20000){

  a = sum(set1 %in% set2)

  c = length(set1) - a

  b = length(set2) - a

  d = universe - length(set2) - c

  contingency_table = matrix(c(a, c, b, d), nrow = 2)

  # one-tailed test for enrichment only

  fisher_results = fisher.test(contingency_table, alternative = "greater")

  # returns data.frame containing the lengths of the two sets, the overlap, the enrichment ratio (odds ratio) and P value

  df <- tibble::tibble( set1_length = length(set1), set2_length = length(set2), overlap = a, ratio = fisher_results$estimate, p.value = fisher_results$p.value)

  return(df)
}

setEnrichment( set1 = set1_v, set2 = set2_v)

Overlap with significant microglia LPS genes FDR 5% log FC > 1 + INFy monocytes > 5

sign_microglia_LPS <- subset(res_LPS_name, adj.P.Val < 0.05)
sign_microglia_LPS_LF <- subset(sign_microglia_LPS, logFC > 1)
sign_monocytes_IFNy <- subset(res_IFNy_monocytes, adj.P.Val < 0.05)
sign_monocytes_IFNy_LF <- subset(sign_monocytes_IFNy, logFC > 5)
overlap <- merge(sign_microglia_LPS_LF, sign_monocytes_IFNy_LF, by = "symbol")
set1 <- sign_microglia_LPS_LF
set2 <- sign_monocytes_IFNy_LF

set1_v <- set1$symbol
set2_v <- set2$symbol

setEnrichment <- function(set1, set2, universe = 20000){

  a = sum(set1 %in% set2)

  c = length(set1) - a

  b = length(set2) - a

  d = universe - length(set2) - c

  contingency_table = matrix(c(a, c, b, d), nrow = 2)

  # one-tailed test for enrichment only

  fisher_results = fisher.test(contingency_table, alternative = "greater")

  # returns data.frame containing the lengths of the two sets, the overlap, the enrichment ratio (odds ratio) and P value

  df <- tibble::tibble( set1_length = length(set1), set2_length = length(set2), overlap = a, ratio = fisher_results$estimate, p.value = fisher_results$p.value)

  return(df)
}

setEnrichment( set1 = set1_v, set2 = set2_v)

Overlap with significant microglia LPS genes FDR 5% log FC > 1 + monocytes < -5

sign_microglia_LPS <- subset(res_LPS_name, adj.P.Val < 0.05)
sign_microglia_LPS_LF <- subset(sign_microglia_LPS, logFC > 1)
sign_monocytes_IFNy <- subset(res_IFNy_monocytes, adj.P.Val < 0.05)
sign_monocytes_IFNy_LF <- subset(sign_monocytes_IFNy, logFC < -5)
overlap <- merge(sign_microglia_LPS_LF, sign_monocytes_IFNy_LF, by = "symbol")
set1 <- sign_microglia_LPS_LF
set2 <- sign_monocytes_IFNy_LF

set1_v <- set1$symbol
set2_v <- set2$symbol

setEnrichment <- function(set1, set2, universe = 20000){

  a = sum(set1 %in% set2)

  c = length(set1) - a

  b = length(set2) - a

  d = universe - length(set2) - c

  contingency_table = matrix(c(a, c, b, d), nrow = 2)

  # one-tailed test for enrichment only

  fisher_results = fisher.test(contingency_table, alternative = "greater")

  # returns data.frame containing the lengths of the two sets, the overlap, the enrichment ratio (odds ratio) and P value

  df <- tibble::tibble( set1_length = length(set1), set2_length = length(set2), overlap = a, ratio = fisher_results$estimate, p.value = fisher_results$p.value)

  return(df)
}

setEnrichment( set1 = set1_v, set2 = set2_v)

Overlap with significant microglia IFNy genes FDR 5% log FC > 1 + LPS monocytes > 5

sign_microglia_IFNy <- subset(res_IFNy_name, adj.P.Val < 0.05)
sign_microglia_IFNy_LF <- subset(sign_microglia_IFNy, logFC > 1)
sign_monocytes_LPS <- subset(res_LPS_monocytes, adj.P.Val < 0.05)
sign_monocytes_LPS_LF <- subset(sign_monocytes_LPS, logFC > 5)
overlap <- merge(sign_microglia_IFNy_LF, sign_monocytes_LPS_LF, by = "symbol")
set1 <- sign_microglia_IFNy_LF
set2 <- sign_monocytes_LPS_LF

set1_v <- set1$symbol
set2_v <- set2$symbol

setEnrichment <- function(set1, set2, universe = 20000){

  a = sum(set1 %in% set2)

  c = length(set1) - a

  b = length(set2) - a

  d = universe - length(set2) - c

  contingency_table = matrix(c(a, c, b, d), nrow = 2)

  # one-tailed test for enrichment only

  fisher_results = fisher.test(contingency_table, alternative = "greater")

  # returns data.frame containing the lengths of the two sets, the overlap, the enrichment ratio (odds ratio) and P value

  df <- tibble::tibble( set1_length = length(set1), set2_length = length(set2), overlap = a, ratio = fisher_results$estimate, p.value = fisher_results$p.value)

  return(df)
}

setEnrichment( set1 = set1_v, set2 = set2_v)

Overlap with significant microglia LPS genes FDR 5% log FC > 1 + monocytes < -5

sign_microglia_LPS <- subset(res_LPS_name, adj.P.Val < 0.05)
sign_microglia_LPS_LF <- subset(sign_microglia_LPS, logFC > 1)
sign_monocytes_IFNy <- subset(res_IFNy_monocytes, adj.P.Val < 0.05)
sign_monocytes_IFNy_LF <- subset(sign_monocytes_IFNy, logFC < -5)
overlap <- merge(sign_microglia_IFNy_LF, sign_monocytes_IFNy_LF, by = "symbol")
set1 <- sign_microglia_LPS_LF
set2 <- sign_monocytes_IFNy_LF

set1_v <- set1$symbol
set2_v <- set2$symbol

setEnrichment <- function(set1, set2, universe = 20000){

  a = sum(set1 %in% set2)

  c = length(set1) - a

  b = length(set2) - a

  d = universe - length(set2) - c

  contingency_table = matrix(c(a, c, b, d), nrow = 2)

  # one-tailed test for enrichment only

  fisher_results = fisher.test(contingency_table, alternative = "greater")

  # returns data.frame containing the lengths of the two sets, the overlap, the enrichment ratio (odds ratio) and P value

  df <- tibble::tibble( set1_length = length(set1), set2_length = length(set2), overlap = a, ratio = fisher_results$estimate, p.value = fisher_results$p.value)

  return(df)
}

setEnrichment( set1 = set1_v, set2 = set2_v)

Overlap with significant microglia LPS genes FDR 5% log FC > 1 + LPS monocytes < -5

sign_microglia_LPS <- subset(res_LPS_name, adj.P.Val < 0.05)
sign_microglia_LPS_LF <- subset(sign_microglia_LPS, logFC > 1)
sign_monocytes_LPS <- subset(res_LPS_monocytes, adj.P.Val < 0.05)
sign_monocytes_LPS_LF <- subset(sign_monocytes_LPS, logFC < -5)
overlap <- merge(sign_microglia_LPS_LF, sign_monocytes_LPS_LF, by = "symbol")
set1 <- sign_microglia_LPS_LF
set2 <- sign_monocytes_LPS_LF

set1_v <- set1$symbol
set2_v <- set2$symbol

setEnrichment <- function(set1, set2, universe = 20000){

  a = sum(set1 %in% set2)

  c = length(set1) - a

  b = length(set2) - a

  d = universe - length(set2) - c

  contingency_table = matrix(c(a, c, b, d), nrow = 2)

  # one-tailed test for enrichment only

  fisher_results = fisher.test(contingency_table, alternative = "greater")

  # returns data.frame containing the lengths of the two sets, the overlap, the enrichment ratio (odds ratio) and P value

  df <- tibble::tibble( set1_length = length(set1), set2_length = length(set2), overlap = a, ratio = fisher_results$estimate, p.value = fisher_results$p.value)

  return(df)
}

setEnrichment( set1 = set1_v, set2 = set2_v)

Overlap with significant microglia LPS genes FDR 5% log FC < - 1 + LPS monocytes > 5

sign_microglia_LPS <- subset(res_LPS_name, adj.P.Val < 0.05)
sign_microglia_LPS_LF <- subset(sign_microglia_LPS, logFC < -1)
sign_monocytes_LPS <- subset(res_LPS_monocytes, adj.P.Val < 0.05)
sign_monocytes_LPS_LF <- subset(sign_monocytes_LPS, logFC > 5)
overlap <- merge(sign_microglia_LPS_LF, sign_monocytes_LPS_LF, by = "symbol")
set1 <- sign_microglia_LPS_LF
set2 <- sign_monocytes_LPS_LF

set1_v <- set1$symbol
set2_v <- set2$symbol

setEnrichment <- function(set1, set2, universe = 20000){

  a = sum(set1 %in% set2)

  c = length(set1) - a

  b = length(set2) - a

  d = universe - length(set2) - c

  contingency_table = matrix(c(a, c, b, d), nrow = 2)

  # one-tailed test for enrichment only

  fisher_results = fisher.test(contingency_table, alternative = "greater")

  # returns data.frame containing the lengths of the two sets, the overlap, the enrichment ratio (odds ratio) and P value

  df <- tibble::tibble( set1_length = length(set1), set2_length = length(set2), overlap = a, ratio = fisher_results$estimate, p.value = fisher_results$p.value)

  return(df)
}

setEnrichment( set1 = set1_v, set2 = set2_v)

Overlap with significant microglia IFNy genes FDR 5% log FC > 1 + IFNy monocytes < -5

sign_microglia_IFNy <- subset(res_IFNy_name, adj.P.Val < 0.05)
sign_microglia_IFNy_LF <- subset(sign_microglia_IFNy, logFC > 1)
sign_monocytes_IFNy <- subset(res_IFNy_monocytes, adj.P.Val < 0.05)
sign_monocytes_IFNy_LF <- subset(sign_monocytes_IFNy, logFC < -5)
overlap <- merge(sign_microglia_IFNy_LF, sign_monocytes_IFNy_LF, by = "symbol")
set1 <- sign_microglia_IFNy_LF
set2 <- sign_monocytes_IFNy_LF

set1_v <- set1$symbol
set2_v <- set2$symbol

setEnrichment <- function(set1, set2, universe = 20000){

  a = sum(set1 %in% set2)

  c = length(set1) - a

  b = length(set2) - a

  d = universe - length(set2) - c

  contingency_table = matrix(c(a, c, b, d), nrow = 2)

  # one-tailed test for enrichment only

  fisher_results = fisher.test(contingency_table, alternative = "greater")

  # returns data.frame containing the lengths of the two sets, the overlap, the enrichment ratio (odds ratio) and P value

  df <- tibble::tibble( set1_length = length(set1), set2_length = length(set2), overlap = a, ratio = fisher_results$estimate, p.value = fisher_results$p.value)

  return(df)
}

setEnrichment( set1 = set1_v, set2 = set2_v)

Overlap with significant microglia LPS genes FDR 5% log FC < - 1 + IFNy monocytes > 5

sign_microglia_LPS <- subset(res_LPS_name, adj.P.Val < 0.05)
sign_microglia_LPS_LF <- subset(sign_microglia_LPS, logFC < -1)
sign_monocytes_IFNy <- subset(res_IFNy_monocytes, adj.P.Val < 0.05)
sign_monocytes_IFNy_LF <- subset(sign_monocytes_IFNy, logFC > 5)
overlap <- merge(sign_microglia_LPS_LF, sign_monocytes_IFNy_LF, by = "symbol")
set1 <- sign_microglia_LPS_LF
set2 <- sign_monocytes_IFNy_LF

set1_v <- set1$symbol
set2_v <- set2$symbol

setEnrichment <- function(set1, set2, universe = 20000){

  a = sum(set1 %in% set2)

  c = length(set1) - a

  b = length(set2) - a

  d = universe - length(set2) - c

  contingency_table = matrix(c(a, c, b, d), nrow = 2)

  # one-tailed test for enrichment only

  fisher_results = fisher.test(contingency_table, alternative = "greater")

  # returns data.frame containing the lengths of the two sets, the overlap, the enrichment ratio (odds ratio) and P value

  df <- tibble::tibble( set1_length = length(set1), set2_length = length(set2), overlap = a, ratio = fisher_results$estimate, p.value = fisher_results$p.value)

  return(df)
}

setEnrichment( set1 = set1_v, set2 = set2_v)

Overlap with significant microglia LPS genes FDR 5% log FC < - 1 + IFNy monocytes < - 5

sign_microglia_LPS <- subset(res_LPS_name, adj.P.Val < 0.05)
sign_microglia_LPS_LF <- subset(sign_microglia_LPS, logFC < -1)
sign_monocytes_IFNy <- subset(res_IFNy_monocytes, adj.P.Val < 0.05)
sign_monocytes_IFNy_LF <- subset(sign_monocytes_IFNy, logFC < - 5)
overlap <- merge(sign_microglia_LPS_LF, sign_monocytes_IFNy_LF, by = "symbol")
set1 <- sign_microglia_LPS_LF
set2 <- sign_monocytes_IFNy_LF

set1_v <- set1$symbol
set2_v <- set2$symbol

setEnrichment <- function(set1, set2, universe = 20000){

  a = sum(set1 %in% set2)

  c = length(set1) - a

  b = length(set2) - a

  d = universe - length(set2) - c

  contingency_table = matrix(c(a, c, b, d), nrow = 2)

  # one-tailed test for enrichment only

  fisher_results = fisher.test(contingency_table, alternative = "greater")

  # returns data.frame containing the lengths of the two sets, the overlap, the enrichment ratio (odds ratio) and P value

  df <- tibble::tibble( set1_length = length(set1), set2_length = length(set2), overlap = a, ratio = fisher_results$estimate, p.value = fisher_results$p.value)

  return(df)
}

setEnrichment( set1 = set1_v, set2 = set2_v)

Scatterplot LogFC microglia vs monocytes LPS

#12812 genes

genes = merge(res_LPS_name, res_LPS_monocytes, by ="symbol")

microglia <- genes[,3]
monocytes <- genes[,10]
df = data.frame(microglia, monocytes)
p <- ggplot(df, aes(x=microglia, y=monocytes) ) +
  geom_smooth(method = "lm", se=FALSE, color="black", formula = y ~ x) +
  geom_bin2d(bins = 100) +
  scale_fill_continuous(type = "viridis") +
  geom_abline(slope = 1, intercept = 0, linetype = 3) +
  theme_bw()
p + stat_cor(method = "spearman")

Scatterplot LogFC microglia vs monocytes IFNy

#12812 genes

genes = merge(res_IFNy_name, res_IFNy_monocytes, by ="symbol")

microglia <- genes[,3]
monocytes <- genes[,10]
df = data.frame(microglia, monocytes)

p <- ggplot(df, aes(x=microglia, y=monocytes) ) +
  geom_smooth(method = "lm", se=FALSE, color="black", formula = y ~ x) +
  geom_bin2d(bins = 100) +
  scale_fill_continuous(type = "viridis") +
  geom_abline(slope = 1, intercept = 0, linetype = 3) +
  theme_bw()
p + stat_cor(method = "spearman")

Scatter only sign genes LPS

sign_microglia <- subset(res_LPS_name, adj.P.Val < 0.05)
microglia_LFC <- subset(sign_microglia, logFC < -1 | logFC > 1)

sign_monocytes <- subset(res_LPS_monocytes, adj.P.Val < 0.05)
monocytes_LFC <- subset(sign_monocytes, logFC < -5 | logFC > 5)

genes2 = merge(sign_microglia, sign_monocytes, by = "symbol")
genes3 <- genes2[!duplicated(genes2$symbol), ]
genes = merge(microglia_LFC, monocytes_LFC, by = "symbol")
list(genes$symbol)
## [[1]]
##  [1] "AC002480.1" "AC007991.4" "AC083837.1" "AC084871.4" "ACOD1"     
##  [6] "CASP5"      "CCL3"       "CCL3L1"     "CCL4"       "CD274"     
## [11] "CD80"       "CD9"        "CSF3"       "CXCL1"      "CYP27B1"   
## [16] "EBI3"       "ETV3L"      "GCKR"       "GJB2"       "IDO1"      
## [21] "IL19"       "IL2RA"      "IL36G"      "IL6"        "ITGB8"     
## [26] "LAD1"       "LINC02605"  "MIR3945HG"  "PTGS2"      "SERPINB7"  
## [31] "SLAMF1"     "TNFAIP6"    "TNIP3"      "TRGC1"
microglia <- genes3[,3]
monocytes <- genes3[,10]
df = data.frame(microglia, monocytes)
rownames(df) = genes3$symbol

symbol4label = c("ACOD1", "CD274", "IDO1", "IL15RA", "SLC7A", "IL6", "CCL5", "CASP5", "PTX3", "IFIT2", "IL2RA", "TNFAIP6", "GPR34", "HTRA1", "TTYH3", "TGFB1", "MERTK", "PPARG", "FCG1A", "VAMP5", "TGFM2", "CAMK1G")
genes_m_filt4lab = df
genes_m_filt4lab$overlap = rownames(genes_m_filt4lab)
genes_m_filt4lab$overlap[(!genes_m_filt4lab$overlap %in% symbol4label)] = ""


p <-  ggplot(genes_m_filt4lab, aes(x = microglia, y = monocytes)) + 
  geom_point(alpha=.5, color="orange")+
  geom_hline(yintercept = 0, linetype = "dashed", colour = "black") +
  geom_vline(xintercept = 0, linetype = "dashed", colour = "black") + 
  stat_smooth(method = "lm", se=F, color="black") + # Add Regression Line 
  stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F)    
p + geom_text_repel(aes(label = overlap),
                    size = 3) + easy_labs(x = expression(paste("Microglia (", logFC,")")), y = expression(paste("Monocytes (", logFC, ")"))) +
  theme_classic()
## `geom_smooth()` using formula 'y ~ x'
## Warning: ggrepel: 4 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Scatter only sign genes IFNy

sign_microglia <- subset(res_IFNy_name, adj.P.Val < 0.05)
microglia_LFC <- subset(sign_microglia, logFC < -1 | logFC > 1)

sign_monocytes <- subset(res_IFNy_monocytes, adj.P.Val < 0.05)
monocytes_LFC <- subset(sign_monocytes, logFC < -5 | logFC > 5)

genes2 = merge(sign_microglia, sign_monocytes, by = "symbol")
genes = merge(microglia_LFC, monocytes_LFC, by = "symbol")
list(genes$symbol)
## [[1]]
##  [1] "AC005515.1" "AC007991.4" "ACOD1"      "AIM2"       "ANKRD22"   
##  [6] "APOL3"      "APOL4"      "BATF2"      "C2"         "CALHM6"    
## [11] "CCL8"       "CD274"      "CXCL10"     "CXCL11"     "CXCL9"     
## [16] "ETV7"       "FCGR1A"     "FCGR1B"     "GBP1"       "GBP1P1"    
## [21] "GBP4"       "GBP5"       "HAPLN3"     "IDO1"       "IFIT3"     
## [26] "IFITM1"     "IL15RA"     "PDCD1LG2"   "RARRES3"    "RSAD2"     
## [31] "SERPING1"   "TNFSF10"
microglia <- genes2[,3]
monocytes <- genes2[,10]
df = data.frame(microglia, monocytes)
rownames(df) = genes2$symbol

symbol4label = c("RGS16", "GM2A", "AIM2", "CXCL10", "CXCL9", "IRF1", "C2", "GBP5", "GBP1", "IFIT3", "IDO1", "IFITM1", "IL15RA", "TNFSF10", "APOL3", "APOL4", "CCL8", "GBP4", "RARRES3", "RSAD2", "SERPING1", "FCGR1A", "FCGR1B", "ANKRD22", "CCL24", "TMEM160", "C4orf48", "CXX5", "PID1", "OLR1", "MMP9", "HAS1")
genes_m_filt4lab = df
genes_m_filt4lab$overlap = rownames(genes_m_filt4lab)
genes_m_filt4lab$overlap[(!genes_m_filt4lab$overlap %in% symbol4label)] = ""


p <-  ggplot(genes_m_filt4lab, aes(x = microglia, y = monocytes)) + 
  geom_point(alpha=.5, color="#C71000FF")+
  geom_hline(yintercept = 0, linetype = "dashed", colour = "black") +
  geom_vline(xintercept = 0, linetype = "dashed", colour = "black") + 
  stat_smooth(method = "lm", se=F, color="black") + # Add Regression Line 
  stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F)    
p + geom_text_repel(aes(label = overlap),
                    size = 3) + easy_labs(x = expression(paste("Microglia (", logFC,")")), y = expression(paste("Monocytes (", logFC, ")"))) +
  theme_classic()
## `geom_smooth()` using formula 'y ~ x'
## Warning: ggrepel: 10 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps